"""
参考 https://blog.csdn.net/m0_37870649/article/details/104689601
逐行读入数据
特征工程部分作了修改，只保留weekday和hour
"""
from LR_FTRL_machine import *
import _pickle as pickle
from datetime import datetime

# 使用100万行数据进行以下参数调优
alpha = .1  # learning rate
beta = 1.  # smoothing parameter for adaptive learning rate
L1 = 1  # L1 regularization, larger value means more regularized
L2 = 0.5  # L2 regularization, larger value means more regularized

# C, feature/hash trick
D = 2 ** 20  # number of weights to use

# D, training/validation
epoch = 5  # learn training data for N passes
start = datetime.now()

# initialize ourselves a learner
learner = ftrl_proximal(alpha, beta, L1, L2, D)
# start training
start = datetime.now()
log_freq = 1000000
for e in range(epoch):
    start1 = datetime.now()
    loss = 0.
    loss_train = 0
    train = "./data/train"
    for t, date, ID, x, y in data(train, D):  # data is a generator
        # data function 逐行返回数据，t为行号，x是特征，y是标签。首先利用特征值进行预测得到概率p
        p = learner.predict(x)
        # 用预测结果计算logloss
        loss_train += logloss(p, y)
        # 把特征值，预测值和标签放回，更新模型
        learner.update(x, p, y)
        # 定期输出训练结果
        if t > 0 and t % log_freq == 0:
            loss_ave = loss_train / t
            print(e, t, loss_ave, str(datetime.now() - start1))
            start1 = datetime.now()
    print("epoch", e, loss_train / t, str(datetime.now() - start))
    # 保留训练过的模型
    pickle.dump(learner, open("model/model_ft1_e" + str(e), 'wb'))
